{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total pages in PDF: 398\n"
     ]
    }
   ],
   "source": [
    "import fitz  # PyMuPDF\n",
    "from PIL import Image\n",
    "from docx import Document\n",
    "from docx.shared import Inches\n",
    "import os\n",
    "\n",
    "def extract_pages_from_pdf(input_pdf_path, output_image_folder):\n",
    "    if not os.path.exists(output_image_folder):\n",
    "        os.makedirs(output_image_folder)\n",
    "\n",
    "    # 打开PDF文档\n",
    "    pdf_document = fitz.open(input_pdf_path)\n",
    "\n",
    "    # 打印总页数\n",
    "    print(\"Total pages in PDF:\", pdf_document.page_count)\n",
    "\n",
    "    # 逐页导出为图片\n",
    "    for i in range(pdf_document.page_count):\n",
    "        image = generate_image_from_page(pdf_document, i)\n",
    "        image.save(f\"{output_image_folder}/page_{i+1}.png\")\n",
    "\n",
    "    # 关闭PDF文档\n",
    "    pdf_document.close()\n",
    "\n",
    "def generate_image_from_page(pdf_document, page_number):\n",
    "    # 获取指定页的图像\n",
    "    page = pdf_document[page_number]\n",
    "    pix = page.get_pixmap()\n",
    "\n",
    "    # 将图像转换为Pillow Image\n",
    "    image = Image.frombytes(\"RGB\", [pix.width, pix.height], pix.samples)\n",
    "\n",
    "    return image\n",
    "\n",
    "def insert_images_into_word(image_folder, output_word_path):\n",
    "    # 创建一个新的Word文档\n",
    "    new_doc = Document()\n",
    "    \n",
    "    # 逐张插入图像\n",
    "    for i in range(1, len(os.listdir(image_folder)) + 1):\n",
    "        image_path = f\"{image_folder}/page_{i}.png\"\n",
    "        insert_image_into_word(new_doc, image_path)\n",
    "    \n",
    "    # 保存新的Word文档\n",
    "    new_doc.save(output_word_path)\n",
    "\n",
    "def insert_image_into_word(doc, image_path):\n",
    "    # 在这里，你需要编写代码将图像插入Word文档\n",
    "    # 可以使用python-docx库来处理Word文档\n",
    "    # 以下是一个示例，具体要根据你的需求进行修改\n",
    "    doc.add_picture(image_path, width=Inches(6.0))\n",
    "\n",
    "# 指定输入PDF文件和输出文件夹\n",
    "input_pdf_path = \"/Users/wanghao4799/Documents/目录.pdf\"\n",
    "output_image_folder = \"/Users/wanghao4799/Documents\"\n",
    "\n",
    "# 提取页面并保存为图片\n",
    "extract_pages_from_pdf(input_pdf_path, output_image_folder)\n",
    "\n",
    "# 插入图像到新的Word文档\n",
    "output_word_path = \"/Users/wanghao4799/Documents/【报告】/【佐证清单处理】/考核指标/output.docx\"\n",
    "insert_images_into_word(output_image_folder, output_word_path)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
